1 Load Package

# install.packages("")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   1.4.0     ✔ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(patchwork) # for muoltiple plots

2 Data

2.3 Read Data

skip first 2 rows

Victoria_pop_age_gender_2021 <- 
  read.csv("Pop-Age-Gender-Victoria-2021-Census-2021A00055917034_5.csv", 
                                         skip = 2, header = T)
BC_pop_2021 <- 
  read.csv("BC-Pop-2021A000259_6.csv", 
                                         skip = 2, header = T)
Canada_senior_pop_2021 <- 
  read.csv("Canada-Senior-pop-percentage-2021A000259_5.csv", 
                                         skip = 2, header = T)

skip last 12 rows

Victoria_pop_age_gender_2021 <- head(
  Victoria_pop_age_gender_2021, -12)
BC_pop_2021 <- head(BC_pop_2021, -12)
Canada_senior_pop_2021 <- head(Canada_senior_pop_2021, -12)

2.4 Analyze data

class(Victoria_pop_age_gender_2021)
## [1] "data.frame"
str(Victoria_pop_age_gender_2021)
## 'data.frame':    42 obs. of  3 variables:
##  $ Age.groups       : chr  "0 to 4" "0 to 4" "5 to 9" "5 to 9" ...
##  $ Gender           : chr  "Women+" "Men+" "Women+" "Men+" ...
##  $ Population.counts: int  1400 1510 1440 1395 1365 1340 1340 1375 3475 2565 ...
colnames(Victoria_pop_age_gender_2021)
## [1] "Age.groups"        "Gender"            "Population.counts"
print(Victoria_pop_age_gender_2021)
##    Age.groups Gender Population.counts
## 1      0 to 4 Women+              1400
## 2      0 to 4   Men+              1510
## 3      5 to 9 Women+              1440
## 4      5 to 9   Men+              1395
## 5    10 to 14 Women+              1365
## 6    10 to 14   Men+              1340
## 7    15 to 19 Women+              1340
## 8    15 to 19   Men+              1375
## 9    20 to 24 Women+              3475
## 10   20 to 24   Men+              2565
## 11   25 to 29 Women+              4525
## 12   25 to 29   Men+              4235
## 13   30 to 34 Women+              4490
## 14   30 to 34   Men+              4545
## 15   35 to 39 Women+              3635
## 16   35 to 39   Men+              3820
## 17   40 to 44 Women+              2945
## 18   40 to 44   Men+              3035
## 19   45 to 49 Women+              2570
## 20   45 to 49   Men+              2635
## 21   50 to 54 Women+              2705
## 22   50 to 54   Men+              2640
## 23   55 to 59 Women+              2895
## 24   55 to 59   Men+              2665
## 25   60 to 64 Women+              3205
## 26   60 to 64   Men+              2755
## 27   65 to 69 Women+              3345
## 28   65 to 69   Men+              2675
## 29   70 to 74 Women+              3230
## 30   70 to 74   Men+              2675
## 31   75 to 79 Women+              2015
## 32   75 to 79   Men+              1685
## 33   80 to 84 Women+              1430
## 34   80 to 84   Men+               985
## 35   85 to 89 Women+              1065
## 36   85 to 89   Men+               605
## 37   90 to 94 Women+               890
## 38   90 to 94   Men+               350
## 39   95 to 99 Women+               275
## 40   95 to 99   Men+                80
## 41       100+ Women+                40
## 42       100+   Men+                10
glimpse(Victoria_pop_age_gender_2021)
## Rows: 42
## Columns: 3
## $ Age.groups        <chr> "0 to 4", "0 to 4", "5 to 9", "5 to 9", "10 to 14", …
## $ Gender            <chr> "Women+", "Men+", "Women+", "Men+", "Women+", "Men+"…
## $ Population.counts <int> 1400, 1510, 1440, 1395, 1365, 1340, 1340, 1375, 3475…
class(BC_pop_2021)
## [1] "data.frame"
str(BC_pop_2021)
## 'data.frame':    42 obs. of  3 variables:
##  $ Age.groups       : chr  "0 to 4" "0 to 4" "5 to 9" "5 to 9" ...
##  $ Gender           : chr  "Women+" "Men+" "Women+" "Men+" ...
##  $ Population.counts: int  105805 111015 118485 125810 123365 132425 122445 131245 143145 151505 ...
colnames(BC_pop_2021)
## [1] "Age.groups"        "Gender"            "Population.counts"
print(BC_pop_2021)
##    Age.groups Gender Population.counts
## 1      0 to 4 Women+            105805
## 2      0 to 4   Men+            111015
## 3      5 to 9 Women+            118485
## 4      5 to 9   Men+            125810
## 5    10 to 14 Women+            123365
## 6    10 to 14   Men+            132425
## 7    15 to 19 Women+            122445
## 8    15 to 19   Men+            131245
## 9    20 to 24 Women+            143145
## 10   20 to 24   Men+            151505
## 11   25 to 29 Women+            166585
## 12   25 to 29   Men+            170500
## 13   30 to 34 Women+            179660
## 14   30 to 34   Men+            178925
## 15   35 to 39 Women+            178175
## 16   35 to 39   Men+            175175
## 17   40 to 44 Women+            163260
## 18   40 to 44   Men+            156480
## 19   45 to 49 Women+            160515
## 20   45 to 49   Men+            149520
## 21   50 to 54 Women+            170460
## 22   50 to 54   Men+            158540
## 23   55 to 59 Women+            183825
## 24   55 to 59   Men+            172520
## 25   60 to 64 Women+            184920
## 26   60 to 64   Men+            170210
## 27   65 to 69 Women+            166700
## 28   65 to 69   Men+            152705
## 29   70 to 74 Women+            141995
## 30   70 to 74   Men+            130370
## 31   75 to 79 Women+             95015
## 32   75 to 79   Men+             86060
## 33   80 to 84 Women+             65780
## 34   80 to 84   Men+             55285
## 35   85 to 89 Women+             42005
## 36   85 to 89   Men+             32315
## 37   90 to 94 Women+             23120
## 38   90 to 94   Men+             13720
## 39   95 to 99 Women+              6925
## 40   95 to 99   Men+              2865
## 41       100+ Women+              1185
## 42       100+   Men+               330
glimpse(BC_pop_2021)
## Rows: 42
## Columns: 3
## $ Age.groups        <chr> "0 to 4", "0 to 4", "5 to 9", "5 to 9", "10 to 14", …
## $ Gender            <chr> "Women+", "Men+", "Women+", "Men+", "Women+", "Men+"…
## $ Population.counts <int> 105805, 111015, 118485, 125810, 123365, 132425, 1224…
glimpse(Canada_senior_pop_2021)
## Rows: 26
## Columns: 3
## $ Geography                        <chr> "Newfoundland and Labrador", "Newfoun…
## $ Years                            <chr> "2016", "2021", "2016", "2021", "2016…
## $ Proportion.of.the.population.... <dbl> 19.4, 23.6, 19.4, 21.2, 19.9, 22.2, 1…

3 Data cleaning

3.1 Change values for Women+ Men+

Victoria_pop_age_gender_2021[
  Victoria_pop_age_gender_2021 == "Women+"] <- "Women"
Victoria_pop_age_gender_2021[
  Victoria_pop_age_gender_2021 == "Men+"] <- "Men"

BC_pop_2021[BC_pop_2021 == "Women+"] <- "Women"
BC_pop_2021[BC_pop_2021 == "Men+"] <- "Men"

3.2 Change column name

colnames(Victoria_pop_age_gender_2021) <- 
  c("age_group", "gender", "pop")
colnames(BC_pop_2021) <- 
  c("age_group", "gender", "pop")
colnames(Canada_senior_pop_2021) <- 
  c("Province", "Year", "PercentageOfPop")

3.3 Sort values of age_group to ensure 100+ appears in correct position

Victoria_pop_age_gender_2021$age_group <- factor(
  Victoria_pop_age_gender_2021$age_group, 
      levels = str_sort(unique(
        Victoria_pop_age_gender_2021$age_group), 
                        numeric = TRUE))
str(Victoria_pop_age_gender_2021)
## 'data.frame':    42 obs. of  3 variables:
##  $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ gender   : chr  "Women" "Men" "Women" "Men" ...
##  $ pop      : int  1400 1510 1440 1395 1365 1340 1340 1375 3475 2565 ...
BC_pop_2021$age_group <- factor(BC_pop_2021$age_group, 
      levels = str_sort(unique(
        BC_pop_2021$age_group), numeric = TRUE))
str(BC_pop_2021)
## 'data.frame':    42 obs. of  3 variables:
##  $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ gender   : chr  "Women" "Men" "Women" "Men" ...
##  $ pop      : int  105805 111015 118485 125810 123365 132425 122445 131245 143145 151505 ...

4 Senior Age Groups

4.1 Select rows by value

Victoria_pop_age_gender_2021_seniors <- 
  Victoria_pop_age_gender_2021[Victoria_pop_age_gender_2021$age_group 
                               %in%
          c("55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79", 
            "80 to 84", "85 to 89", "90 to 94", "95 to 99", "100+"), ]

BC_pop_2021_seniors <- 
  BC_pop_2021[BC_pop_2021$age_group %in%
          c("55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79", 
            "80 to 84", "85 to 89", "90 to 94", "95 to 99", "100+"), ]

4.2 create new pop column with commas

to use for labels

# this changes values of y axis,
 BC_pop_2021$pop_comma <- formatC(
   BC_pop_2021$pop, 
   format = "d", big.mark = ",")
 BC_pop_2021_seniors$pop_comma <- formatC(
   BC_pop_2021_seniors$pop, 
   format = "d", big.mark = ",")
 Victoria_pop_age_gender_2021$pop_comma <- formatC(
   Victoria_pop_age_gender_2021$pop, 
   format = "d", big.mark = ",")
 Victoria_pop_age_gender_2021_seniors$pop_comma <- formatC(
   Victoria_pop_age_gender_2021_seniors$pop, 
   format = "d", big.mark = ",")

5 Pivot wider

Victoria_pop_age_gender_2021_wide <- Victoria_pop_age_gender_2021 %>%
  pivot_wider(names_from = gender, values_from = pop)

Canada_senior_pop_2021_wide <- Canada_senior_pop_2021 %>%
  pivot_wider(names_from = Year, values_from = PercentageOfPop)

6 Analyze wider data

class(Victoria_pop_age_gender_2021_wide)
## [1] "tbl_df"     "tbl"        "data.frame"
str(Victoria_pop_age_gender_2021_wide)
## tibble [42 × 4] (S3: tbl_df/tbl/data.frame)
##  $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ pop_comma: chr [1:42] "1,400" "1,510" "1,440" "1,395" ...
##  $ Women    : int [1:42] 1400 NA 1440 NA 1365 NA 1340 NA 3475 NA ...
##  $ Men      : int [1:42] NA 1510 NA 1395 NA 1340 NA 1375 NA 2565 ...
colnames(Victoria_pop_age_gender_2021_wide)
## [1] "age_group" "pop_comma" "Women"     "Men"
print(Victoria_pop_age_gender_2021_wide)
## # A tibble: 42 × 4
##    age_group pop_comma Women   Men
##    <fct>     <chr>     <int> <int>
##  1 0 to 4    1,400      1400    NA
##  2 0 to 4    1,510        NA  1510
##  3 5 to 9    1,440      1440    NA
##  4 5 to 9    1,395        NA  1395
##  5 10 to 14  1,365      1365    NA
##  6 10 to 14  1,340        NA  1340
##  7 15 to 19  1,340      1340    NA
##  8 15 to 19  1,375        NA  1375
##  9 20 to 24  3,475      3475    NA
## 10 20 to 24  2,565        NA  2565
## # … with 32 more rows
glimpse(Victoria_pop_age_gender_2021_wide)
## Rows: 42
## Columns: 4
## $ age_group <fct> 0 to 4, 0 to 4, 5 to 9, 5 to 9, 10 to 14, 10 to 14, 15 to 19…
## $ pop_comma <chr> "1,400", "1,510", "1,440", "1,395", "1,365", "1,340", "1,340…
## $ Women     <int> 1400, NA, 1440, NA, 1365, NA, 1340, NA, 3475, NA, 4525, NA, …
## $ Men       <int> NA, 1510, NA, 1395, NA, 1340, NA, 1375, NA, 2565, NA, 4235, …
glimpse(Canada_senior_pop_2021_wide)
## Rows: 13
## Columns: 3
## $ Province <chr> "Newfoundland and Labrador", "Prince Edward Island", "Nova Sc…
## $ `2016`   <dbl> 19.4, 19.4, 19.9, 19.9, 18.3, 16.7, 15.6, 15.5, 12.3, 18.3, 1…
## $ `2021`   <dbl> 23.6, 21.2, 22.2, 22.8, 20.6, 18.5, 17.1, 17.5, 14.8, 20.3, 1…
str(Canada_senior_pop_2021_wide)
## tibble [13 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Province: chr [1:13] "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
##  $ 2016    : num [1:13] 19.4 19.4 19.9 19.9 18.3 16.7 15.6 15.5 12.3 18.3 ...
##  $ 2021    : num [1:13] 23.6 21.2 22.2 22.8 20.6 18.5 17.1 17.5 14.8 20.3 ...

7 Sort values of age_group to ensure 100+ appears in correct position

factor, str_sort unique

Victoria_pop_age_gender_2021_wide$age_group <- factor(
  Victoria_pop_age_gender_2021_wide$age_group, 
  levels = str_sort(unique(
    Victoria_pop_age_gender_2021_wide$age_group), 
                    numeric = TRUE))
str(Victoria_pop_age_gender_2021_wide)
## tibble [42 × 4] (S3: tbl_df/tbl/data.frame)
##  $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ pop_comma: chr [1:42] "1,400" "1,510" "1,440" "1,395" ...
##  $ Women    : int [1:42] 1400 NA 1440 NA 1365 NA 1340 NA 3475 NA ...
##  $ Men      : int [1:42] NA 1510 NA 1395 NA 1340 NA 1375 NA 2565 ...

8 Row index number

Victoria_pop_age_gender_2021_wide$index <- as.numeric(
  row.names(Victoria_pop_age_gender_2021_wide))
Victoria_pop_age_gender_2021_wide[order(
  Victoria_pop_age_gender_2021_wide$index), ]
## # A tibble: 42 × 5
##    age_group pop_comma Women   Men index
##    <fct>     <chr>     <int> <int> <dbl>
##  1 0 to 4    1,400      1400    NA     1
##  2 0 to 4    1,510        NA  1510     2
##  3 5 to 9    1,440      1440    NA     3
##  4 5 to 9    1,395        NA  1395     4
##  5 10 to 14  1,365      1365    NA     5
##  6 10 to 14  1,340        NA  1340     6
##  7 15 to 19  1,340      1340    NA     7
##  8 15 to 19  1,375        NA  1375     8
##  9 20 to 24  3,475      3475    NA     9
## 10 20 to 24  2,565        NA  2565    10
## # … with 32 more rows
Canada_senior_pop_2021$index <- as.numeric(
  row.names(Canada_senior_pop_2021))
Canada_senior_pop_2021[order(Canada_senior_pop_2021$index), ]
##                     Province Year PercentageOfPop index
## 1  Newfoundland and Labrador 2016            19.4     1
## 2  Newfoundland and Labrador 2021            23.6     2
## 3       Prince Edward Island 2016            19.4     3
## 4       Prince Edward Island 2021            21.2     4
## 5                Nova Scotia 2016            19.9     5
## 6                Nova Scotia 2021            22.2     6
## 7              New Brunswick 2016            19.9     7
## 8              New Brunswick 2021            22.8     8
## 9                     Quebec 2016            18.3     9
## 10                    Quebec 2021            20.6    10
## 11                   Ontario 2016            16.7    11
## 12                   Ontario 2021            18.5    12
## 13                  Manitoba 2016            15.6    13
## 14                  Manitoba 2021            17.1    14
## 15              Saskatchewan 2016            15.5    15
## 16              Saskatchewan 2021            17.5    16
## 17                   Alberta 2016            12.3    17
## 18                   Alberta 2021            14.8    18
## 19          British Columbia 2016            18.3    19
## 20          British Columbia 2021            20.3    20
## 21                     Yukon 2016            11.9    21
## 22                     Yukon 2021            15.0    22
## 23     Northwest Territories 2016             7.7    23
## 24     Northwest Territories 2021            10.0    24
## 25                   Nunavut 2016             3.8    25
## 26                   Nunavut 2021             4.4    26

9 Plot data

9.1 plot

plot(Victoria_pop_age_gender_2021)

9.2 Victoria ggplot - stacked bar

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_1 <- Victoria_pop_age_gender_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", alpha = 0.95, width = 0.85)  + 
  # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_1 

9.3 BC Pop ggplot - stacked bar

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

BC <- BC_pop_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", alpha = 0.95, width = 0.85)  + 
  # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") + 
  scale_y_continuous(limits = c(0, 400000), labels = scales::comma) 
        # to avoid scientific notation
BC

9.4 BC pop ggplot - stacked column with labels

need to find the cumulative sum for each stack
https://r-graphics.org/recipe-bar-graph-labels#cb84-7

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe

# get cumulative sum
BC_pop_2021 <- BC_pop_2021 %>%
  group_by(age_group) %>% 
  mutate(label_y = cumsum(pop))

colour_palette <- c("#af8dc3", "#7fbf7b") 

BC_lab <- BC_pop_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_col()  + 
  geom_text(aes(y = label_y, label = pop_comma), 
            vjust = 1.25, size = 2) +
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") + 
  scale_y_continuous(limits = c(0, 400000), labels = scales::comma) 
BC_lab

9.5 Victoria pop ggplot - stacked column with labels

need to find the cumulative sum for each stack
https://r-graphics.org/recipe-bar-graph-labels#cb84-7

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe

# get cumulative sum
Victoria_pop_age_gender_2021 <- Victoria_pop_age_gender_2021 %>%
  group_by(age_group) %>% 
  mutate(label_y = cumsum(pop))

colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_lab <- Victoria_pop_age_gender_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_col()  + 
  geom_text(aes(y = label_y, label = pop_comma), 
            vjust = 1.25, size = 3) +
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_lab

9.6 Victoria pop ggplot - side-by-side

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_dodge <- Victoria_pop_age_gender_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.95, width = 0.85)  + 
              # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_dodge

9.7 BC Pop ggplot

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

BC_dodge <- BC_pop_2021 %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.95, width = 0.85)  + 
              # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "BC Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") + 
  scale_y_continuous(limits = c(0, 200000), labels = scales::comma) 
BC_dodge

9.8 Victoria seniors ggplot

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_seniors <- Victoria_pop_age_gender_2021_seniors %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.95, width = 0.85)  + 
                      # wideth = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniors

9.9 BC seniors ggplot

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

BC_seniors <- BC_pop_2021_seniors %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.95, width = 0.85)  + 
                      # wideth = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "BC Seniors Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
BC_seniors

9.10 Victoria seniors with data labels ggplot

https://intellipaat.com/community/16343/how-to-put-labels-over-geombar-for-each-bar-in-r-with-ggplot2

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_seniors_lab <- Victoria_pop_age_gender_2021_seniors %>%
  ggplot(aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = "dodge", 
           alpha = 0.95, width = 0.85)  + 
                        # wideth = 1 no space between bars
  geom_text(aes(label = pop_comma), size = 3, 
            position = position_dodge(width = 0.9), vjust = -0.25) + 
                                    # vjust negative shows above bar
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniors_lab

9.11 Victoria seniors with data labels, legend at bottom ggplot

https://intellipaat.com/community/16343/how-to-put-labels-over-geombar-for-each-bar-in-r-with-ggplot2

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 

Vic_seniors_lab_leg_bot <-  ggplot(
    Victoria_pop_age_gender_2021_seniors, 
    aes(x = age_group, y = pop, fill = gender)) +
  geom_bar(stat = "identity", position = "dodge", 
           alpha = 0.95, width = 0.85)  + 
                            # wideth = 1 no space between bars
  geom_text(aes(label = pop_comma), size = 3, 
            position = position_dodge(width = 0.9), vjust = -0.25) + 
                                # vjust negative shows above bar
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniors_lab_leg_bot

9.12 Women pivot wide ggplot

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14

Vic_women <- Victoria_pop_age_gender_2021_wide %>%
  ggplot(aes(x = age_group, y = Women)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.75, width = 0.75, fill = "#d8b365")  + 
  # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
    ) +
  labs(title = "Victoria, BC Women Population by Age",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_women
## Warning: Removed 21 rows containing missing values (geom_bar).

9.13 Men pivot wide ggplot

PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14

Vic_men <- Victoria_pop_age_gender_2021_wide %>%
  ggplot(aes(x = age_group, y = Men)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.75, width = 0.75, fill = "#d8b365")  + 
  # width = 1 no space between bars
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    ) +
  labs(title = "Victoria, BC Men's Population by Age",
       subtitle = "(2021 Canada Census)",
       x = "Age Group",
       y = "Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_men
## Warning: Removed 21 rows containing missing values (geom_bar).

9.14 Canada Seniors as Percent of Population ggplot

# colour_palette <- c("#d8b365", "#5ab4ac") 
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b") 


# https://r-graph-gallery.com/267-reorder-a-variable-in-ggplot2.html
Canada_senior_pop_2021 %>% mutate(
    Province = fct_reorder(Province, index)) %>%
  ggplot(aes(x = Province, y = PercentageOfPop, fill = Year)) +
  geom_bar(stat = "identity", position = position_dodge(), 
           alpha = 0.95, width = 0.85)  + 
              # width = 1 no space between bars
  geom_text(aes(label = PercentageOfPop), size = 3, 
            position = position_dodge(width = 0.9), 
            vjust = 0.5, hjust = 1.2) + 
  theme_light() +
  theme(
    plot.title = element_text(
      size = rel(1.5), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
    axis.text.x = element_text(angle = 360, hjust = 1),
    legend.title = element_blank()
    ) +
  scale_fill_manual(values = colour_palette) +
  labs(title = "Canadian Seniors as Percent of Population",
       subtitle = "(2016 & 2021 Canada Census)",
       x = NULL,
       y = "% of Population",
       caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") + 
  scale_y_continuous(limits = c(0, 25)) + 
  coord_flip() +
  # https://www.geeksforgeeks.org/reversing-the-order-of-a-ggplot2-legend/
  guides(fill = guide_legend(reverse=TRUE))

### Save file

   aspect_ratio <- 2.5
   ggsave("Canada_senior_pop_perc_2016_2021.jpg", 
          height = 10 , width = 4 * aspect_ratio)

10 Patchwork

BC + BC_dodge # compare stacked to side-by-side

BC + BC_lab

Vic_lab + Vic_1

Vic_dodge + BC_dodge

Vic_men + Vic_women
## Warning: Removed 21 rows containing missing values (geom_bar).
## Removed 21 rows containing missing values (geom_bar).

BC_seniors + Vic_seniors

Vic_seniors + Vic_seniors_lab

Vic_seniors_lab + Vic_seniors_lab_leg_bot

Vic_seniors_lab / Vic_seniors_lab_leg_bot

Vic_1 + Vic_dodge # compare stacked to side-by-side

(BC + BC_dodge) / (Vic_1 + Vic_dodge)

BC + BC_dodge + Vic_1 + Vic_dodge +
  plot_layout(widths = c(2, 1))

11 Function census_age_gender_ggplot and save all in one

census_age_gender_ggplot_save <- function(data, colpat1, colpat2, 
                            title, subtitle, caption, labelsize, 
                            filename, aspect_ratio = 2.5){
  
  # clean data
  data[data == "Women+"] <- "Women"
  data[data == "Men+"] <- "Men"
  colnames(data) <- c("age_group", "gender", "pop")

  # sort age values
  data$age_group <- factor(
    data$age_group, levels = str_sort(unique(data$age_group), 
                                      numeric = TRUE))

  # Colour brewer color-blind safe
  colour_palette <- c(colpat1, colpat2)

  # ggplot
  ggplot(data, aes(x = age_group, y = pop, fill = gender)) +
    geom_bar(stat = "identity", position = "dodge", 
             alpha = 0.95, width = 0.85)  +
    # wideth = 1 no space between bars
    geom_text(aes(label = pop), size = labelsize, 
              position = position_dodge(width = 0.9), 
              vjust = 0.35, hjust = 1.05, angle = 90) +
    # vjust negative shows above bar
    theme_light() +
    theme(
      plot.title = element_text(
        size = rel(1.5), face = "bold", hjust = 0.5),
      plot.subtitle = element_text(
        size = rel(.92), hjust = 0.5),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "bottom",
      legend.title = element_blank()
    ) +
    scale_fill_manual(values = colour_palette) +
    labs(title = title,
         subtitle = subtitle,
         x = "Age Group",
         y = "Population",
         caption = caption)

  aspect_ratio <- aspect_ratio
  ggsave(filename, height = 10 , width = 4 * aspect_ratio)
}

11.1 Plot and save file Using Function census_age_gender_ggplot

without the pop_comma for thousands (can’t figure out how to create in function)

p1 <- census_age_gender_ggplot_save(
        data = Victoria_pop_age_gender_2021_seniors,
        colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
        title = "Victoria Seniors Population \n by Age & Gender",
        subtitle = "(2021 Canada Census)",
        caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
        labelsize = 3,
        filename = "Victoria_pop_age_gender_2021_seniors.jpg"
)
p1
## [1] "Victoria_pop_age_gender_2021_seniors.jpg"
# this changes values of y axis,
# BC_pop_2021$pop <- formatC(BC_pop_2021$pop, format = "d", big.mark = ",")

p2 <- census_age_gender_ggplot_save(data = BC_pop_2021,
         colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
         title = "BC Population \n by Age & Gender",
         subtitle = "(2021 Canada Census)",
         caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
         labelsize = 3,
         filename = "BC_pop_2021.jpg"
)
p2
## [1] "BC_pop_2021.jpg"
p3 <- census_age_gender_ggplot_save(data = BC_pop_2021_seniors,
         colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
         title = "BC Seniors Population \n by Age & Gender",
         subtitle = "(2021 Canada Census)",
         caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
         labelsize = 3,
         filename = "BC_pop_2021_seniors.jpg"
)
p3
## [1] "BC_pop_2021_seniors.jpg"

12 Function census_age_gender_ggplot

  • Separate plot creation from filesave
census_age_gender_ggplot <- function(
  data, colpat1, colpat2, title, subtitle, caption, labelsize){
  
  # clean data
  data[data == "Women+"] <- "Women"
  data[data == "Men+"] <- "Men"
  colnames(data) <- c("age_group", "gender", "pop")

  # sort age values
  data$age_group <- factor(
    data$age_group, levels = str_sort(unique(data$age_group), 
                                      numeric = TRUE))

  # Colour brewer color-blind safe
  colour_palette <- c(colpat1, colpat2)

  # ggplot
  age_group_plot <- ggplot(
      data, aes(x = age_group, y = pop, fill = gender)) +
    geom_bar(stat = "identity", position = "dodge", 
             alpha = 0.95, width = 0.85)  +
    # wideth = 1 no space between bars
    geom_text(aes(label = pop), size = labelsize, 
              position = position_dodge(width = 0.9), 
              vjust = 0.35, hjust = 1.05, angle = 90) +
    # vjust negative shows above bar
    theme_light() +
    theme(
      plot.title = element_text(
        size = rel(1.5), face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "bottom",
      legend.title = element_blank()
    ) +
    scale_fill_manual(values = colour_palette) +
    labs(title = title,
         subtitle = subtitle,
         x = "Age Group",
         y = "Population",
         caption = caption)
}

12.1 Function to Save ggplot

save_ggplot <- function(filename, aspect_ratio = 2.5){
  aspect_ratio <- aspect_ratio
  ggsave(filename, height = 10 , width = 4 * aspect_ratio)
}

12.2 Plot and save file Using separate save function

p1_plot <- census_age_gender_ggplot(
    data = Victoria_pop_age_gender_2021_seniors,
     colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
     title = "Victoria Seniors Population \n by Age & Gender",
     subtitle = "(2021 Canada Census)",
     caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
     labelsize = 3
)
p1_plot

p1_save <- save_ggplot(
  filename = "Victoria_pop_age_gender_2021_seniors_1.jpg")
p1_save
## [1] "Victoria_pop_age_gender_2021_seniors_1.jpg"
# this changes values of y axis,
# BC_pop_2021$pop <- formatC(BC_pop_2021$pop, format = "d", big.mark = ",")

p2_plot <- census_age_gender_ggplot(
     data = BC_pop_2021,
     colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
     title = "BC Population \n by Age & Gender",
     subtitle = "(2021 Canada Census)",
     caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
     labelsize = 3
)
p2_plot

p2_save <- save_ggplot(filename = "BC_pop_2021_1.jpg")
p2_save
## [1] "BC_pop_2021_1.jpg"
p3_plot <- census_age_gender_ggplot(
     data = BC_pop_2021_seniors,
     colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
     title = "BC Seniors Population \n by Age & Gender",
     subtitle = "(2021 Canada Census)",
     caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
     labelsize = 3
)
p3_plot

p3_save <- save_ggplot(filename = "BC_pop_2021_seniors_1.jpg")
p3_save
## [1] "BC_pop_2021_seniors_1.jpg"

13 Session info

# to document specific packages used to run script
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.1.1 forcats_0.5.1   stringr_1.4.0   dplyr_1.0.9    
##  [5] purrr_0.3.4     readr_1.4.0     tidyr_1.2.0     tibble_3.1.7   
##  [9] ggplot2_3.3.5   tidyverse_1.3.1
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7        lubridate_1.7.10  assertthat_0.2.1  digest_0.6.27    
##  [5] utf8_1.2.1        R6_2.5.0          cellranger_1.1.0  backports_1.1.10 
##  [9] reprex_2.0.0      evaluate_0.15     httr_1.4.2        highr_0.9        
## [13] pillar_1.7.0      rlang_1.0.2       readxl_1.3.1      rstudioapi_0.13  
## [17] jquerylib_0.1.4   rmarkdown_2.15    textshaping_0.2.1 labeling_0.4.2   
## [21] munsell_0.5.0     broom_0.8.0       compiler_4.0.2    modelr_0.1.8     
## [25] xfun_0.30         pkgconfig_2.0.3   systemfonts_1.0.1 htmltools_0.5.2  
## [29] tidyselect_1.1.2  fansi_0.5.0       crayon_1.4.1      dbplyr_2.1.1     
## [33] withr_2.4.2       grid_4.0.2        jsonlite_1.7.2    gtable_0.3.0     
## [37] lifecycle_1.0.1   DBI_1.1.1         magrittr_2.0.3    scales_1.2.0     
## [41] cli_3.3.0         stringi_1.6.2     farver_2.1.0      fs_1.5.2         
## [45] xml2_1.3.2        bslib_0.3.1       ellipsis_0.3.2    ragg_0.4.0       
## [49] generics_0.1.0    vctrs_0.4.1       tools_4.0.2       glue_1.6.2       
## [53] hms_1.0.0         fastmap_1.1.0     yaml_2.2.1        colorspace_2.0-1 
## [57] rvest_1.0.0       knitr_1.39        haven_2.5.0       sass_0.4.0.9000